library(fredr)
library(tidyverse)
library(forecast)
library(ggplot2)
library(fpp3)
library(tsibble)
library(gtrendsR)
library(tidyquant)
library(PerformanceAnalytics)

# set api key
fredr_set_key('2ce72ebb6c7e053880ec7ad5a950237f')
# gather data
start_date <- "1978-01-01"
end_date <- "2023-11-01"

# UMICH Consumer Sentiment Index: https://fred.stlouisfed.org/series/UMCSENT
consumer_sentiment <- fredr(
  series_id = "UMCSENT",
  observation_start = as.Date(start_date),
  observation_end = as.Date(end_date),
  frequency = "m", # monthly
  units = "lin"
)

# Unemployment Rate: https://fred.stlouisfed.org/series/UNRATE
unemployment <- fredr(
  series_id = "UNRATE",
  observation_start = as.Date(start_date),
  observation_end = as.Date(end_date),
  frequency = "m", # monthly
  units = "lin"
)

# YOY % Change in Unemployment Rate: https://fred.stlouisfed.org/series/UNRATE
unemployment_change <- fredr(
  series_id = "UNRATE",
  observation_start = as.Date(start_date),
  observation_end = as.Date(end_date),
  frequency = "m", # monthly
  units = "pc1"
)

# YOY % Change in Real Income: https://fred.stlouisfed.org/series/DSPIC96
income <- fredr(
  series_id = "DSPIC96",
  observation_start = as.Date(start_date),
  observation_end = as.Date(end_date),
  frequency = "m", # monthly
  units = "pc1"
)

# YOY % Change in CPI: https://fred.stlouisfed.org/series/CPIAUCSL
cpi <- fredr(
  series_id = "CPIAUCSL",
  observation_start = as.Date(start_date),
  observation_end = as.Date(end_date),
  frequency = "m", # monthly
  units = "pc1"
)

# Avg 30-year Mortgage Rate: https://fred.stlouisfed.org/series/MORTGAGE30US
mortgage <- fredr(
  series_id = "MORTGAGE30US",
  observation_start = as.Date(start_date),
  observation_end = as.Date(end_date),
  frequency = "m", # monthly
  units = "lin"
)

# YOY % Change in Mortgage Rate: https://fred.stlouisfed.org/series/MORTGAGE30US
mortgage_change <- fredr(
  series_id = "MORTGAGE30US",
  observation_start = as.Date(start_date),
  observation_end = as.Date(end_date),
  frequency = "m", # monthly
  units = "pc1"
)

# YOY % Change in Median House Price: https://fred.stlouisfed.org/series/MSPUS
house <- fredr(
  series_id = "MSPUS",
  observation_start = as.Date(start_date),
  observation_end = as.Date(end_date),
  frequency = "q", # quarterly
  units = "pc1"
)

# YOY % Change in Personal Consumption: https://fred.stlouisfed.org/series/PCE
consumption <- fredr(
  series_id = "PCE",
  observation_start = as.Date(start_date),
  observation_end = as.Date(end_date),
  frequency = "m", # monthly
  units = "pc1"
)

# NBER Recession Indicator: https://fred.stlouisfed.org/series/USREC
recession_indicator <- fredr(
  series_id = "USREC",
  observation_start = as.Date(start_date),
  observation_end = as.Date(end_date),
  frequency = "m", # monthly
  units = "lin"
)

# YOY % Change in S&P 500 Index: https://finance.yahoo.com/quote/%5EGSPC?p=%5EGSPC
getSymbols("^GSPC", from = "1977-01-01",
           to = end_date,
           warnings = FALSE,
           auto.assign = TRUE,
           src = "yahoo")
## [1] "GSPC"
# YOY % Change in US Dollar Index: 
getSymbols("DX-Y.NYB", from = "1977-01-01",
           to = end_date,
           warnings = FALSE,
           auto.assign = TRUE,
           src = "yahoo")
## [1] "DX-Y.NYB"
# process fred data
process <- function(data, name) {
  data[name] <- data$value 
  
  data %>%
    select(date, all_of(name))
}

consumer_sentiment_modified <- process(consumer_sentiment, 'consumer_sentiment')
unemployment_modified <- process(unemployment, 'unemployment_rate')
unemployment_change_modified <- process(unemployment_change, 'unemployment_rate_change')
income_modified <- process(income, 'income')
cpi_modified <- process(cpi, 'cpi')
mortgage_modified <- process(mortgage, 'mortgage')
mortgage_change_modified <- process(mortgage_change, 'mortgage_change')
house_modified <- process(house, 'house')
consumption_modified <- process(consumption, 'consumption')
recession_indicator_modified <- process(recession_indicator, 'recession_indicator')

# breakout quarters to months for house data
house_modified <- house_modified %>%
  complete(date = seq.Date(min(date), max(date), by = "month")) %>%
  fill(house)

# process sp500 data
sp500_modified <- as.data.frame(GSPC) %>%
    rownames_to_column('date_day') %>%
    select(date_day, GSPC.Close) %>%
    mutate(date = as.Date(paste(substr(date_day, start = 1, stop = 7), "-01", sep=''))) %>%
    group_by(date) %>%
    summarize(avg_close = mean(GSPC.Close)) %>%
    mutate(sp500 = ((avg_close - lag(avg_close, 12)) / lag(avg_close, 12)) * 100) %>%
    select(-avg_close) %>%
    filter(date >= start_date)

# process dollar_index data
dollar_index_modified <- as.data.frame(`DX-Y.NYB`) %>%
    rownames_to_column('date_day') %>%
    select(date_day, `DX-Y.NYB.Close`) %>%
    filter(!is.na(`DX-Y.NYB.Close`)) %>%
    mutate(date = as.Date(paste(substr(date_day, start = 1, stop = 7), "-01", sep=''))) %>%
    group_by(date) %>%
    summarize(avg_close = mean(`DX-Y.NYB.Close`)) %>%
    mutate(dollar_index = ((avg_close - lag(avg_close, 12)) / lag(avg_close, 12)) * 100) %>%
    select(-avg_close) %>%
    filter(date >= start_date)
# join data

join <- consumer_sentiment_modified %>%
    left_join(unemployment_modified, by = c('date' = 'date')) %>%
    left_join(unemployment_change_modified, by = c('date' = 'date')) %>%
    left_join(income_modified, by = c('date' = 'date')) %>%
    left_join(cpi_modified, by = c('date' = 'date')) %>%
    left_join(mortgage_modified, by = c('date' = 'date')) %>%
    left_join(mortgage_change_modified, by = c('date' = 'date')) %>%
    left_join(house_modified, by = c('date' = 'date')) %>%
    left_join(consumption_modified, by = c('date' = 'date')) %>%
    left_join(sp500_modified, by = c('date' = 'date')) %>%
    left_join(dollar_index_modified, by = c('date' = 'date')) %>%
    left_join(recession_indicator_modified, by = c('date' = 'date'))

head(join)
## # A tibble: 6 × 13
##   date       consumer_sentiment unemployment_rate unemployment_rate_cha…¹ income
##   <date>                  <dbl>             <dbl>                   <dbl>  <dbl>
## 1 1978-01-01               83.7               6.4                   -14.7   4.28
## 2 1978-02-01               84.3               6.3                   -17.1   5.84
## 3 1978-03-01               78.8               6.3                   -14.9   5.24
## 4 1978-04-01               81.6               6.1                   -15.3   5.45
## 5 1978-05-01               82.9               6                     -14.3   5.36
## 6 1978-06-01               80                 5.9                   -18.1   4.93
## # ℹ abbreviated name: ¹​unemployment_rate_change
## # ℹ 8 more variables: cpi <dbl>, mortgage <dbl>, mortgage_change <dbl>,
## #   house <dbl>, consumption <dbl>, sp500 <dbl>, dollar_index <dbl>,
## #   recession_indicator <dbl>
# processing for modeling
final <- join[rowSums(is.na(join)) == 0,]

# visualize
final_pivoted <- final %>%
  mutate(`UMICH Consumer Sentiment` = consumer_sentiment,
         `Unemployment Rate` = unemployment_rate, 
         `YOY % Change in Unemployment Rate` = unemployment_rate_change,
         `YOY % Change in Median Real Disposable Income` = income, 
         `YOY % Change in Personal Consumption` = consumption,
         `YOY % Change in CPI` = cpi, 
         `YOY % Change in S&P 500` = sp500, 
         `YOY % Change in Dollar Index` = dollar_index,
         `Avg 30-year Mortgage Rate` = mortgage,
         `YOY % Change in Mortgage Rate` = mortgage_change,
         `YOY % Change in Median House Price` = house,
         `NBER Recession Indicator` = recession_indicator) %>%
  select(date, contains(" ")) %>%
  pivot_longer(cols = -c(date), 
               names_to = 'variable', values_to = 'value')

visual1 <- final_pivoted %>%
  filter(variable %in% c("UMICH Consumer Sentiment", 
                         "Unemployment Rate", 
                         "YOY % Change in Unemployment Rate", 
                         "YOY % Change in Median Real Disposable Income",
                         "YOY % Change in Personal Consumption",
                         "YOY % Change in CPI"))

visual2 <- final_pivoted %>%
  filter(variable %in% c("YOY % Change in S&P 500", 
                         "YOY % Change in Dollar Index", 
                         "Avg 30-year Mortgage Rate", 
                         "YOY % Change in Mortgage Rate",
                         "YOY % Change in Median House Price",
                         "NBER Recession Indicator"))

# create visuals
ggplot(visual1, aes(x = date, y = value, color = variable)) +
  geom_line() +
  labs(x = "", y = "Value", color = 'Variable', title = "Economic Indicators Over Time", caption = 'Source: Federal Reserve Bank of St Louis (FRED), Yahoo Finance | Visual by Michael Dunphy, @mtdunphy') +
  theme_minimal() +
  theme(legend.position = "none", plot.caption = element_text(size = 7, hjust=0)) +
  facet_wrap(~variable, scales = "free_y", ncol = 2)

ggplot(visual2, aes(x = date, y = value, color = variable)) +
  geom_line() +
  labs(x = "", y = "Value", color = 'Variable', title = "Economic Indicators Over Time", caption = 'Source: Federal Reserve Bank of St Louis (FRED), Yahoo Finance | Visual by Michael Dunphy, @mtdunphy') +
  theme_minimal() +
  theme(legend.position = "none", plot.caption = element_text(size = 7, hjust=0)) +
  facet_wrap(~variable, scales = "free_y", ncol = 2)

# eda

# correlation matrix
cor_matrix <- cor(final %>% select(-date))
chart.Correlation(cor_matrix)

# multivariate forecast: https://stackoverflow.com/questions/70175496/how-to-plot-my-multivariable-regression-time-series-model-in-r

train <- as_tsibble(final) %>%
    filter(date < '2017-01-01')
## Using `date` as index variable.
test <- as_tsibble(final) %>%
    filter(date >= '1978-01-01') 
## Using `date` as index variable.
fit <- train %>% 
    model(TSLM(consumer_sentiment ~ . - date - consumer_sentiment + season() + trend()))

check <- lm(consumer_sentiment ~ . - date, data = train)
summary(check)
## 
## Call:
## lm(formula = consumer_sentiment ~ . - date, data = train)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -20.2296  -3.5643   0.2821   3.6820  15.8167 
## 
## Coefficients:
##                            Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              105.609873   1.333508  79.197  < 2e-16 ***
## unemployment_rate         -4.662906   0.189088 -24.660  < 2e-16 ***
## unemployment_rate_change   0.010160   0.024373   0.417 0.676989    
## income                     0.607496   0.191557   3.171 0.001620 ** 
## cpi                       -3.501803   0.190017 -18.429  < 2e-16 ***
## mortgage                   1.674235   0.142533  11.746  < 2e-16 ***
## mortgage_change            0.007035   0.024968   0.282 0.778261    
## house                      0.231323   0.062763   3.686 0.000256 ***
## consumption                0.803399   0.234420   3.427 0.000665 ***
## sp500                      0.137487   0.019165   7.174 2.97e-12 ***
## dollar_index               0.082948   0.026574   3.121 0.001914 ** 
## recession_indicator       -5.001298   1.092625  -4.577 6.08e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.268 on 456 degrees of freedom
## Multiple R-squared:  0.8327, Adjusted R-squared:  0.8287 
## F-statistic: 206.3 on 11 and 456 DF,  p-value: < 2.2e-16
checkresiduals(check)

## 
##  Breusch-Godfrey test for serial correlation of order up to 15
## 
## data:  Residuals
## LM test = 226.52, df = 15, p-value < 2.2e-16
forecast_consumption <- forecast(fit, new_data = test)

forecast_consumption %>% 
  autoplot(as_tsibble(final)) + 
    labs(level = 'Level', x = "", y = "UMICH Consumer Sentiment", subtitle="Based on Economic Indicators from 1978-2016") +
    ggtitle("Expected vs Actual Consumer Sentiment from 1978 to 2023") +
    labs(caption = "Note: Economic indicators used in estimating consumer sentiment include unemployement rate,YOY % change in unemployment rate, 
YOY % change in median real disposable income, YOY % change in personal consumption, YOY % change in consumer price index, 
YOY % change in S&P 500 index, YOY % change in dollar index, Avg 30-year mortgage rate, YOY % change in mortgage rate, 
YOY % change in median house price, and NBER recession indicator. Training data is monthly from Jan 1978 to Jan 2017.
          
Source: Federal Reserve Bank of St Louis (FRED), Yahoo Finance | Visual by Michael Dunphy, @mtdunphy") +
          scale_x_date(date_breaks = '5 year', date_labels = "%Y") +
          theme_bw() +
          theme(plot.caption = element_text(size = 7, hjust=0), plot.subtitle = element_text(size = 10, hjust=0))
## Using `date` as index variable.

# narrow date range

start_date <- '2004-01-01'

plot_range <- as_tsibble(final) %>%
  filter(date >= start_date)
## Using `date` as index variable.
test <- as_tsibble(final) %>%
    filter(date >= start_date) 
## Using `date` as index variable.
fit <- train %>% 
    model(TSLM(consumer_sentiment ~ . - date - consumer_sentiment + season() + trend()))

forecast_consumption <- forecast(fit, new_data = test)

forecast_consumption %>% 
  autoplot(plot_range) + 
    labs(level = 'Level', x = "", y = "UMICH Consumer Sentiment", subtitle="Based on Economic Indicators from 1978-2016") +
    ggtitle("Expected vs Actual Consumer Sentiment from 2004 to 2023") +
    labs(caption = "Note: Economic indicators used in estimating consumer sentiment include unemployement rate,YOY % change in unemployment rate, 
YOY % change in median real disposable income, YOY % change in personal consumption, YOY % change in consumer price index, 
YOY % change in S&P 500 index, YOY % change in dollar index, Avg 30-year mortgage rate, YOY % change in mortgage rate, 
YOY % change in median house price, and NBER recession indicator. Training data is monthly from Jan 1978 to Jan 2017.
          
Source: Federal Reserve Bank of St Louis (FRED), Yahoo Finance | Visual by Michael Dunphy, @mtdunphy") +
          scale_x_date(date_breaks = '2 year', date_labels = "%Y") +
          theme_bw() +
          theme(plot.caption = element_text(size = 7, hjust=0), plot.subtitle = element_text(size = 10, hjust=0))

# adding google trends
# https://trends.google.com/trends/explore?cat=16&date=all&geo=US&q=recession&hl=en
# https://trends.google.com/trends/explore?cat=16&date=all&geo=US&q=inflation&hl=en
# https://trends.google.com/trends/explore?cat=16&date=all&geo=US&q=prices&hl=en
# news category is represented with 16, all categories is 0


# search1 <- gtrends(c("inflation"), category = 16, time= "all", geo = "US")$interest_over_time %>%
#   select(date, keyword, hits)
# 
# search2 <- gtrends(c("recession"), category = 16, time= "all", geo = "US")$interest_over_time %>%
#   select(date, keyword, hits)
# 
# search3 <- gtrends(c("prices"), category = 16, time= "all", geo = "US")$interest_over_time %>%
#   select(date, keyword, hits)
# 
# search <- union(search1, union(search2, search3))

search <- read.csv("GTrends Exported - All.csv") %>%
    mutate(date = as.Date(date))

search_pivoted <- search %>%
  pivot_wider(names_from = keyword, values_from = hits) %>%
  select(date, `inflation`, recession, prices)

p <- ggplot() + 
  geom_line(data = search, aes(x = date, y = hits, col = keyword)) +
  labs(x = "", y = "Interest over time", subtitle="Among All Categories in US, Normalized 0 to 100, Monthly", color = 'Keyword') +
  ggtitle("Google Search Trend for Keywords from 2004 to 2023") +
  labs(legend = 'Keyword', caption = "Source: Google | Visual by Michael Dunphy, @mtdunphy") +
  scale_x_date(date_breaks = '2 year', date_labels = "%Y") +
  theme_minimal() +
  theme(plot.caption = element_text(size = 7, hjust=0), plot.subtitle = element_text(size = 10, hjust=0))

p + facet_wrap(~keyword, scales = "free_y", ncol = 1)

# combine with economic indicators
combined <- final %>%
  filter(date >= '2004-01-01') %>%
  left_join(search_pivoted, by = c('date' = 'date'))

# correlation matrix
cor_matrix <- cor(combined %>% select(-date))
chart.Correlation(cor_matrix)

train <- as_tsibble(combined) %>%
    filter(date < '2017-01-01')
## Using `date` as index variable.
test <- as_tsibble(combined)
## Using `date` as index variable.
# testing with recession keyword
fit <- train %>% 
    model(TSLM(consumer_sentiment ~ . - date - consumer_sentiment - prices - `inflation` + season() + trend()))

check <- lm(consumer_sentiment ~ . - date - prices - `inflation`, data = train)
summary(check)
## 
## Call:
## lm(formula = consumer_sentiment ~ . - date - prices - inflation, 
##     data = train)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -10.6070  -2.8256   0.1031   2.8477  10.9710 
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              107.02620    4.50643  23.750  < 2e-16 ***
## unemployment_rate         -4.15784    0.35312 -11.775  < 2e-16 ***
## unemployment_rate_change   0.09639    0.06072   1.587  0.11463    
## income                    -0.20786    0.29192  -0.712  0.47758    
## cpi                       -3.72856    0.58045  -6.424 1.85e-09 ***
## mortgage                   0.05958    0.82935   0.072  0.94283    
## mortgage_change           -0.07378    0.04583  -1.610  0.10961    
## house                      0.43949    0.09999   4.395 2.14e-05 ***
## consumption                1.85610    0.67211   2.762  0.00651 ** 
## sp500                      0.10686    0.04869   2.195  0.02978 *  
## dollar_index               0.01453    0.06891   0.211  0.83334    
## recession_indicator       -6.78566    2.53624  -2.675  0.00833 ** 
## recession                 -0.02716    0.08122  -0.334  0.73855    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.706 on 143 degrees of freedom
## Multiple R-squared:  0.8442, Adjusted R-squared:  0.8311 
## F-statistic: 64.55 on 12 and 143 DF,  p-value: < 2.2e-16
checkresiduals(check)

## 
##  Breusch-Godfrey test for serial correlation of order up to 16
## 
## data:  Residuals
## LM test = 51.108, df = 16, p-value = 1.525e-05
forecast_consumption <- forecast(fit, new_data = test)

forecast_consumption %>% 
  autoplot(combined) + 
    labs(level = 'Level', x = "", y = "UMICH Consumer Sentiment", subtitle="Based on Economic Indicators + Google Search Trend: 'Recession' among All Categories") +
    ggtitle("Expected vs Actual Consumer Sentiment from 2004 to 2023") +
    labs(caption = "Note: Google search trend is for keyword 'recession' among all categories in the US. Economic indicators used in estimating consumer 
sentiment include unemployement rate,YOY % change in unemployment rate, YOY % change in median real disposable income, YOY % 
change in personal consumption, YOY % change in consumer price index, YOY % change in S&P 500 index, YOY % change in dollar 
index, Avg 30-year mortgage rate, YOY % change in mortgage rate, YOY % change in median house price, and NBER recession indicator. 
Training data is monthly from Jan 2004 to Jan 2017.
          
Source: Federal Reserve Bank of St Louis (FRED), Yahoo Finance, Google | Visual by Michael Dunphy, @mtdunphy") +
          scale_x_date(date_breaks = '2 year', date_labels = "%Y") +
          theme_bw() +
          theme(plot.caption = element_text(size = 7, hjust=0), plot.subtitle = element_text(size = 10, hjust=0))
## Using `date` as index variable.

# testing with prices keyword
fit <- train %>% 
    model(TSLM(consumer_sentiment ~ . - date - consumer_sentiment - recession - `inflation` + season() + trend()))

check <- lm(consumer_sentiment ~ . - date - recession - `inflation`, data = train)
summary(check)
## 
## Call:
## lm(formula = consumer_sentiment ~ . - date - recession - inflation, 
##     data = train)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -10.4683  -2.8472   0.1885   2.8250  11.0420 
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              108.24667    5.64205  19.186  < 2e-16 ***
## unemployment_rate         -4.19130    0.34419 -12.177  < 2e-16 ***
## unemployment_rate_change   0.09500    0.06030   1.575  0.11738    
## income                    -0.19989    0.29291  -0.682  0.49607    
## cpi                       -3.69443    0.58810  -6.282 3.79e-09 ***
## mortgage                   0.05572    0.82687   0.067  0.94637    
## mortgage_change           -0.07389    0.04581  -1.613  0.10900    
## house                      0.45921    0.09889   4.644 7.68e-06 ***
## consumption                1.87013    0.67204   2.783  0.00612 ** 
## sp500                      0.10285    0.04968   2.070  0.04023 *  
## dollar_index               0.01466    0.06888   0.213  0.83172    
## recession_indicator       -7.08393    2.20053  -3.219  0.00159 ** 
## prices                    -0.02446    0.05650  -0.433  0.66573    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.704 on 143 degrees of freedom
## Multiple R-squared:  0.8442, Adjusted R-squared:  0.8312 
## F-statistic: 64.59 on 12 and 143 DF,  p-value: < 2.2e-16
checkresiduals(check)

## 
##  Breusch-Godfrey test for serial correlation of order up to 16
## 
## data:  Residuals
## LM test = 50.726, df = 16, p-value = 1.755e-05
forecast_consumption <- forecast(fit, new_data = test)

forecast_consumption %>% 
  autoplot(combined) + 
    labs(level = 'Level', x = "", y = "UMICH Consumer Sentiment", subtitle="Based on Economic Indicators + Google Search Trend: 'Prices' among All Categories") +
    ggtitle("Expected vs Actual Consumer Sentiment from 2004 to 2023") +
    labs(caption = "Note: Google search trend is for keyword 'prices' among all categories in the US. Economic indicators used in estimating consumer 
sentiment include unemployement rate,YOY % change in unemployment rate, YOY % change in median real disposable income, YOY % 
change in personal consumption, YOY % change in consumer price index, YOY % change in S&P 500 index, YOY % change in dollar 
index, Avg 30-year mortgage rate, YOY % change in mortgage rate, YOY % change in median house price, and NBER recession indicator. 
Training data is monthly from Jan 2004 to Jan 2017.
          
Source: Federal Reserve Bank of St Louis (FRED), Yahoo Finance, Google | Visual by Michael Dunphy, @mtdunphy") +
          scale_x_date(date_breaks = '2 year', date_labels = "%Y") +
          theme_bw() +
          theme(plot.caption = element_text(size = 7, hjust=0), plot.subtitle = element_text(size = 10, hjust=0))
## Using `date` as index variable.

# testing with inflation keyword
fit <- train %>% 
    model(TSLM(consumer_sentiment ~ . - date - consumer_sentiment - prices - recession + season() + trend()))

check <- lm(consumer_sentiment ~ . - date - prices - recession, data = train)
summary(check)
## 
## Call:
## lm(formula = consumer_sentiment ~ . - date - prices - recession, 
##     data = train)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -10.3392  -2.8056   0.1503   2.8231  10.8724 
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              107.05913    5.24337  20.418  < 2e-16 ***
## unemployment_rate         -4.18978    0.34685 -12.080  < 2e-16 ***
## unemployment_rate_change   0.09571    0.06221   1.539  0.12611    
## income                    -0.20835    0.29331  -0.710  0.47865    
## cpi                       -3.73908    0.58026  -6.444 1.67e-09 ***
## mortgage                   0.08814    0.82606   0.107  0.91518    
## mortgage_change           -0.07290    0.04655  -1.566  0.11952    
## house                      0.45011    0.09660   4.660 7.18e-06 ***
## consumption                1.88205    0.69449   2.710  0.00755 ** 
## sp500                      0.10675    0.04883   2.186  0.03044 *  
## dollar_index               0.01520    0.06892   0.221  0.82575    
## recession_indicator       -7.16622    2.22635  -3.219  0.00159 ** 
## inflation                 -0.01532    0.12986  -0.118  0.90623    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.707 on 143 degrees of freedom
## Multiple R-squared:  0.8441, Adjusted R-squared:  0.831 
## F-statistic:  64.5 on 12 and 143 DF,  p-value: < 2.2e-16
checkresiduals(check)

## 
##  Breusch-Godfrey test for serial correlation of order up to 16
## 
## data:  Residuals
## LM test = 51.552, df = 16, p-value = 1.294e-05
forecast_consumption <- forecast(fit, new_data = test)

forecast_consumption %>% 
  autoplot(combined) + 
    labs(level = 'Level', x = "", y = "UMICH Consumer Sentiment", subtitle="Based on Economic Indicators + Google Search Trend: 'Inflation' among All Categories") +
    ggtitle("Expected vs Actual Consumer Sentiment from 2004 to 2023") +
    labs(caption = "Note: Google search trend is for keyword 'inflation' among all categories in the US. Economic indicators used in estimating consumer 
sentiment include unemployement rate,YOY % change in unemployment rate, YOY % change in median real disposable income, YOY % 
change in personal consumption, YOY % change in consumer price index, YOY % change in S&P 500 index, YOY % change in dollar 
index, Avg 30-year mortgage rate, YOY % change in mortgage rate, YOY % change in median house price, and NBER recession indicator. 
Training data is monthly from Jan 2004 to Jan 2017.
          
Source: Federal Reserve Bank of St Louis (FRED), Yahoo Finance, Google | Visual by Michael Dunphy, @mtdunphy") +
          scale_x_date(date_breaks = '2 year', date_labels = "%Y") +
          theme_bw() +
          theme(plot.caption = element_text(size = 7, hjust=0), plot.subtitle = element_text(size = 10, hjust=0))
## Using `date` as index variable.

search <- read.csv("GTrends Exported - News.csv") %>%
    mutate(date = as.Date(date))

search_pivoted <- search %>%
  pivot_wider(names_from = keyword, values_from = hits) %>%
  select(date, `inflation`, recession, prices)

p <- ggplot() + 
  geom_line(data = search, aes(x = date, y = hits, col = keyword)) +
  labs(x = "", y = "Interest over time", subtitle="Among News Categories, Monthly, Normalized 0 to 100", color = 'Keyword') +
  ggtitle("Google Search Trend for Keywords from 2004 to 2023") +
  labs(legend = 'Keyword', caption = "Source: Google | Visual by Michael Dunphy, @mtdunphy") +
  scale_x_date(date_breaks = '2 year', date_labels = "%Y") +
  theme_minimal() +
  theme(plot.caption = element_text(size = 7, hjust=0))

p + facet_wrap(~keyword, scales = "free_y", ncol = 1)

# combine with economic indicators
combined <- final %>%
  filter(date >= '2004-01-01') %>%
  left_join(search_pivoted, by = c('date' = 'date'))

# correlation matrix
cor_matrix <- cor(combined %>% select(-date))
chart.Correlation(cor_matrix)

train <- as_tsibble(combined) %>%
    filter(date < '2017-01-01')
## Using `date` as index variable.
test <- as_tsibble(combined)
## Using `date` as index variable.
# testing with recession keyword
fit <- train %>% 
    model(TSLM(consumer_sentiment ~ . - date - consumer_sentiment - prices - `inflation` + season() + trend()))

check <- lm(consumer_sentiment ~ . - date - prices - `inflation`, data = train)
summary(check)
## 
## Call:
## lm(formula = consumer_sentiment ~ . - date - prices - inflation, 
##     data = train)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -11.456  -2.829   0.167   2.924  12.937 
## 
## Coefficients:
##                            Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              107.416357   4.431909  24.237  < 2e-16 ***
## unemployment_rate         -4.115350   0.347044 -11.858  < 2e-16 ***
## unemployment_rate_change   0.098874   0.060120   1.645  0.10225    
## income                    -0.206266   0.290329  -0.710  0.47858    
## cpi                       -3.750289   0.577231  -6.497 1.27e-09 ***
## mortgage                   0.018414   0.819977   0.022  0.98211    
## mortgage_change           -0.075323   0.045627  -1.651  0.10096    
## house                      0.413711   0.099828   4.144 5.81e-05 ***
## consumption                1.882778   0.668981   2.814  0.00558 ** 
## sp500                      0.104100   0.048513   2.146  0.03358 *  
## dollar_index               0.005969   0.068972   0.087  0.93116    
## recession_indicator       -5.772795   2.474966  -2.332  0.02107 *  
## recession                 -0.075282   0.062077  -1.213  0.22724    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.683 on 143 degrees of freedom
## Multiple R-squared:  0.8456, Adjusted R-squared:  0.8327 
## F-statistic: 65.28 on 12 and 143 DF,  p-value: < 2.2e-16
checkresiduals(check)

## 
##  Breusch-Godfrey test for serial correlation of order up to 16
## 
## data:  Residuals
## LM test = 50.55, df = 16, p-value = 1.873e-05
forecast_consumption <- forecast(fit, new_data = test)

forecast_consumption %>% 
  autoplot(combined) + 
      labs(level = 'Level', x = "", y = "UMICH Consumer Sentiment", subtitle="Based on Economic Indicators + Google Search Trend: 'Recession' among News Categories") +
      ggtitle("Expected vs Actual Consumer Sentiment from 2004 to 2023") +
      labs(caption = "Note: Google search trend is for keyword 'recession' among all categories in the US. Economic indicators used in estimating consumer 
sentiment include unemployement rate,YOY % change in unemployment rate, YOY % change in median real disposable income, YOY % 
change in personal consumption, YOY % change in consumer price index, YOY % change in S&P 500 index, YOY % change in dollar 
index, Avg 30-year mortgage rate, YOY % change in mortgage rate, YOY % change in median house price, and NBER recession indicator. 
Training data is monthly from Jan 2004 to Jan 2017.
          
Source: Federal Reserve Bank of St Louis (FRED), Yahoo Finance, Google | Visual by Michael Dunphy, @mtdunphy") +
          scale_x_date(date_breaks = '2 year', date_labels = "%Y") +
          theme_bw() +
          theme(plot.caption = element_text(size = 7, hjust=0), plot.subtitle = element_text(size = 10, hjust=0))
## Using `date` as index variable.

# testing with prices keyword
fit <- train %>% 
    model(TSLM(consumer_sentiment ~ . - date - consumer_sentiment - recession - `inflation` + season() + trend()))

check <- lm(consumer_sentiment ~ . - date - consumer_sentiment - recession - `inflation`, data = train)
summary(check)
## 
## Call:
## lm(formula = consumer_sentiment ~ . - date - consumer_sentiment - 
##     recession - inflation, data = train)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -10.3433  -2.8328   0.1275   2.8448  11.0302 
## 
## Coefficients:
##                            Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              106.902183   4.638137  23.049  < 2e-16 ***
## unemployment_rate         -4.188395   0.345395 -12.126  < 2e-16 ***
## unemployment_rate_change   0.094739   0.060656   1.562  0.12052    
## income                    -0.206963   0.294427  -0.703  0.48324    
## cpi                       -3.724944   0.588433  -6.330 2.97e-09 ***
## mortgage                   0.097388   0.821507   0.119  0.90580    
## mortgage_change           -0.073789   0.045845  -1.610  0.10970    
## house                      0.451091   0.097663   4.619 8.52e-06 ***
## consumption                1.864547   0.672608   2.772  0.00631 ** 
## sp500                      0.106155   0.049387   2.149  0.03328 *  
## dollar_index               0.015547   0.069026   0.225  0.82212    
## recession_indicator       -7.144126   2.262419  -3.158  0.00194 ** 
## prices                    -0.006225   0.049888  -0.125  0.90088    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.707 on 143 degrees of freedom
## Multiple R-squared:  0.8441, Adjusted R-squared:  0.831 
## F-statistic:  64.5 on 12 and 143 DF,  p-value: < 2.2e-16
checkresiduals(check)

## 
##  Breusch-Godfrey test for serial correlation of order up to 16
## 
## data:  Residuals
## LM test = 51.312, df = 16, p-value = 1.414e-05
forecast_consumption <- forecast(fit, new_data = test)

forecast_consumption %>% 
  autoplot(combined) + 
      labs(level = 'Level', x = "", y = "UMICH Consumer Sentiment", subtitle="Based on Economic Indicators + Google Search Trend: 'Prices' among News Categories") +
      ggtitle("Expected vs Actual Consumer Sentiment from 2004 to 2023") +
      labs(caption = "Note: Google search trend is for keyword 'prices' among all categories in the US. Economic indicators used in estimating consumer 
sentiment include unemployement rate,YOY % change in unemployment rate, YOY % change in median real disposable income, YOY % 
change in personal consumption, YOY % change in consumer price index, YOY % change in S&P 500 index, YOY % change in dollar 
index, Avg 30-year mortgage rate, YOY % change in mortgage rate, YOY % change in median house price, and NBER recession indicator. 
Training data is monthly from Jan 2004 to Jan 2017.
          
Source: Federal Reserve Bank of St Louis (FRED), Yahoo Finance, Google | Visual by Michael Dunphy, @mtdunphy") +
          scale_x_date(date_breaks = '2 year', date_labels = "%Y") +
          theme_bw() +
          theme(plot.caption = element_text(size = 7, hjust=0), plot.subtitle = element_text(size = 10, hjust=0))
## Using `date` as index variable.

# testing with inflation keyword
fit <- train %>% 
    model(TSLM(consumer_sentiment ~ . - date - consumer_sentiment - prices - recession + season() + trend()))

check <- lm(consumer_sentiment ~ . - date - consumer_sentiment - prices - recession, data = train)
summary(check)
## 
## Call:
## lm(formula = consumer_sentiment ~ . - date - consumer_sentiment - 
##     prices - recession, data = train)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -10.3353  -2.8211   0.1454   2.8316  10.8359 
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              106.71344    4.41647  24.163  < 2e-16 ***
## unemployment_rate         -4.18158    0.34410 -12.152  < 2e-16 ***
## unemployment_rate_change   0.09878    0.06235   1.584  0.11536    
## income                    -0.21077    0.29171  -0.723  0.47115    
## cpi                       -3.75308    0.58221  -6.446 1.65e-09 ***
## mortgage                   0.16827    0.85261   0.197  0.84383    
## mortgage_change           -0.07361    0.04584  -1.606  0.11051    
## house                      0.45507    0.09812   4.638 7.88e-06 ***
## consumption                1.93334    0.71214   2.715  0.00745 ** 
## sp500                      0.10415    0.04969   2.096  0.03784 *  
## dollar_index               0.01335    0.06912   0.193  0.84709    
## recession_indicator       -7.01996    2.27526  -3.085  0.00244 ** 
## inflation                 -0.03210    0.10527  -0.305  0.76086    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.706 on 143 degrees of freedom
## Multiple R-squared:  0.8441, Adjusted R-squared:  0.8311 
## F-statistic: 64.54 on 12 and 143 DF,  p-value: < 2.2e-16
checkresiduals(check)

## 
##  Breusch-Godfrey test for serial correlation of order up to 16
## 
## data:  Residuals
## LM test = 50.861, df = 16, p-value = 1.67e-05
forecast_consumption <- forecast(fit, new_data = test)

forecast_consumption %>% 
  autoplot(combined) + 
    labs(level = 'Level', x = "", y = "UMICH Consumer Sentiment", subtitle="Based on Economic Indicators + Google Search Trend: 'Inflation' among News Categories") +
    ggtitle("Expected vs Actual Consumer Sentiment from 2004 to 2023") +
    labs(caption = "Note: Google search trend is for keyword 'inflation' among all categories in the US. Economic indicators used in estimating consumer 
sentiment include unemployement rate,YOY % change in unemployment rate, YOY % change in median real disposable income, YOY % 
change in personal consumption, YOY % change in consumer price index, YOY % change in S&P 500 index, YOY % change in dollar 
index, Avg 30-year mortgage rate, YOY % change in mortgage rate, YOY % change in median house price, and NBER recession indicator. 
Training data is monthly from Jan 2004 to Jan 2017.
          
Source: Federal Reserve Bank of St Louis (FRED), Yahoo Finance, Google | Visual by Michael Dunphy, @mtdunphy") +
          scale_x_date(date_breaks = '2 year', date_labels = "%Y") +
          theme_bw() +
          theme(plot.caption = element_text(size = 7, hjust=0), plot.subtitle = element_text(size = 10, hjust=0))
## Using `date` as index variable.